import pandas as pd
import numpy as np
import os
import seaborn as sns
import matplotlib.pyplot as plt
import librosa # Library for audio feature extraction
import librosa.display
from IPython.display import Audio # Display audio in Jupyter notebooks
import IPython.display as ipd
import warnings
# Ignore warnings to keep the output clean
warnings.filterwarnings('ignore')
data_path = r"C:/Users/Nagham/Downloads/deep learning/project/voice"
# Lists to store file paths and corresponding labels
paths = []
labels = []
# Walk through the directory structure to collect file paths and infer labels
for dirname, _, filenames in os.walk(data_path): # Loops through each file in the current directory.
for filename in filenames:
# Construct the full file path
file_path = os.path.join(dirname, filename) # full file path by joining the directory name and file name.
paths.append(file_path) #Appends the full file path to the paths list.
# Extract the emotion label from the directory name
label = os.path.basename(dirname) #Extracts the emotion label from the directory name
labels.append(label) #Appends the emotion label to the labels list.
# Print the number of collected file paths
print(len(paths))
117
# Display the first 5 file paths in the 'paths' list
paths[:5]
['C:/Users/Nagham/Downloads/deep learning/project/voice\\Angry\\101411.wav', 'C:/Users/Nagham/Downloads/deep learning/project/voice\\Angry\\3456.wav', 'C:/Users/Nagham/Downloads/deep learning/project/voice\\Angry\\anyconv-com-img-9315-vocals.wav', 'C:/Users/Nagham/Downloads/deep learning/project/voice\\Angry\\AnyConv.com__IMG_9281.wav', 'C:/Users/Nagham/Downloads/deep learning/project/voice\\Angry\\AnyConv.com__IMG_9286.wav']
# Create a DataFrame
df = pd.DataFrame({'Emotions': labels, 'path': paths})
# Select one path for each unique emotion
df_one_per_emotion = df.groupby('Emotions').apply(lambda group: group.sample(1)).reset_index(drop=True)
# Add 'Emotions' column to the DataFrame and populate it with emotion labels
df['Emotions'] = labels
# Add 'path' column to the DataFrame and populate it with file paths
df['path'] = paths
# Display the resulting DataFrame
df_one_per_emotion
| Emotions | path | |
|---|---|---|
| 0 | Angry | C:/Users/Nagham/Downloads/deep learning/projec... |
| 1 | Shock | C:/Users/Nagham/Downloads/deep learning/projec... |
| 2 | excited | C:/Users/Nagham/Downloads/deep learning/projec... |
| 3 | happy | C:/Users/Nagham/Downloads/deep learning/projec... |
| 4 | love | C:/Users/Nagham/Downloads/deep learning/projec... |
| 5 | nervous | C:/Users/Nagham/Downloads/deep learning/projec... |
| 6 | sad | C:/Users/Nagham/Downloads/deep learning/projec... |
| 7 | scared | C:/Users/Nagham/Downloads/deep learning/projec... |
# Count the occurrences of each emotion in the 'Emotions' column and display the result
df['Emotions'].value_counts()
Emotions Shock 21 Angry 20 happy 16 excited 15 sad 15 nervous 12 love 10 scared 8 Name: count, dtype: int64
# Create a count plot using Seaborn to visualize the distribution of emotions in the DataFrame
sns.countplot(data=df, x='Emotions')
<Axes: xlabel='Emotions', ylabel='count'>
# Specify the file path of the audio file
filename = "C:/Users/Nagham/Downloads/deep learning/project/voice/Angry/101411.wav" # Adjust the path accordingly
# Set the figure size for the plot
plt.figure(figsize=(14, 5))
# Load the audio file using librosa and extract data and sample rate
data, sample_rate = librosa.load(filename)
# Display the waveform of the audio using librosa.display
librosa.display.waveshow(data, sr=sample_rate)
# Set plot title and labels
plt.title('Angry')
plt.xlabel('Time (s)')
# Show the plot
plt.show()
# Display the audio using IPython Audio
ipd.Audio(filename)
filename='C:/Users/Nagham/Downloads/deep learning/project/voice/excited/AnyConv.com__IMG_9278.wav'# Adjust the path accordingly
plt.figure(figsize=(14, 5))
data, sample_rate = librosa.load(filename)
librosa.display.waveshow(data, sr=sample_rate)
plt.title('Excited')
plt.xlabel('Time (s)')
plt.show()
# Display the audio
ipd.Audio(filename)
filename="C:/Users/Nagham/Downloads/deep learning/project/voice/happy/AnyConv.com__IMG_9437.wav"# Adjust the path accordingly
plt.figure(figsize=(14, 5))
data, sample_rate = librosa.load(filename)
librosa.display.waveshow(data, sr=sample_rate)
plt.title('happy')
plt.xlabel('Time (s)')
plt.show()
# Display the audio
ipd.Audio(filename)
filename="C:/Users/Nagham/Downloads/deep learning/project/voice/love/img-92477.wav"# Adjust the path accordingly
plt.figure(figsize=(14, 5))
data, sample_rate = librosa.load(filename)
librosa.display.waveshow(data, sr=sample_rate)
plt.title('love')
plt.xlabel('Time (s)')
plt.show()
# Display the audio
ipd.Audio(filename)
filename="C:/Users/Nagham/Downloads/deep learning/project/voice/nervous/AnyConv.com__IMG_9458.wav"# Adjust the path accordingly
plt.figure(figsize=(14, 5))
data, sample_rate = librosa.load(filename)
librosa.display.waveshow(data, sr=sample_rate)
plt.title('Nervous')
plt.xlabel('Time (s)')
plt.show()
# Display the audio
ipd.Audio(filename)
filename="C:/Users/Nagham/Downloads/deep learning/project/voice/sad/AnyConv.com__IMG_9453.wav"# Adjust the path accordingly
plt.figure(figsize=(14, 5))
data, sample_rate = librosa.load(filename)
librosa.display.waveshow(data, sr=sample_rate)
plt.title('Sad')
plt.xlabel('Time (s)')
plt.show()
# Display the audio
ipd.Audio(filename)
filename="C:/Users/Nagham/Downloads/deep learning/project/voice/scared/AnyConv.com__IMG_9309.wav"# Adjust the path accordingly
plt.figure(figsize=(14, 5))
data, sample_rate = librosa.load(filename)
librosa.display.waveshow(data, sr=sample_rate)
plt.title('Scared')
plt.xlabel('Time (s)')
plt.show()
# Display the audio
ipd.Audio(filename)
filename="C:/Users/Nagham/Downloads/deep learning/project/voice/Shock/AnyConv.com__IMG_9426.wav"# Adjust the path accordingly
plt.figure(figsize=(14, 5))
data, sample_rate = librosa.load(filename)
librosa.display.waveshow(data, sr=sample_rate)
plt.title('Shock')
plt.xlabel('Time (s)')
plt.show()
# Display the audio
ipd.Audio(filename)
# Function to extract Mel-Frequency Cepstral Coefficients (MFCCs) from an audio file
def extract_mfcc(filename):
# Load the audio file using librosa, extracting 11 seconds of audio starting from 0.2 seconds
y, sr = librosa.load(filename, duration=11, offset=0.2)
# Compute the MFCCs of the audio signal, using 40 MFCC coefficients and taking the mean
mfcc = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T, axis=0)
# Return the extracted MFCCs
return mfcc
# Example usage: Extract MFCCs from the path of the first audio file in the DataFrame
extract_mfcc(df['path'][0])
array([-2.55606689e+02, 1.11070755e+02, -4.99097633e+01, 2.75862026e+01,
-2.34350929e+01, 7.19156456e+00, -1.04140625e+01, -6.83629990e+00,
-6.11068010e+00, -2.15051579e+00, -7.74954796e+00, 3.26797271e+00,
-2.41084456e+00, -1.12567444e+01, -6.14663219e+00, -6.88144636e+00,
-8.27416611e+00, -3.11036539e+00, 2.04550600e+00, 7.96155882e+00,
6.83683538e+00, 9.39081955e+00, 4.00762224e+00, 3.08250523e+00,
3.65168929e+00, 7.88768530e-01, -2.36914024e-01, -3.77300680e-01,
-1.32614172e+00, 4.49824959e-01, -1.40432000e-01, -1.31372404e+00,
-1.58230531e+00, 6.48146927e-01, 1.98375404e+00, 1.27283943e+00,
-3.39046359e-01, -1.67415309e+00, 1.73153639e+00, 3.98780680e+00],
dtype=float32)
#compute MFCC features for each audio file specified in the 'speech' column and create a Series (or array-like object) containing the extracted MFCCs for each audio file
X_mfcc = df['path'].apply(lambda x: extract_mfcc(x))
#Applies the extract_mfcc function to each element in the 'path' column using the apply method.
X_mfcc
0 [-255.60669, 111.070755, -49.909763, 27.586203...
1 [-127.6092, 72.77756, -37.640472, 11.209493, -...
2 [-232.47989, 31.925543, -52.232925, -9.251359,...
3 [-187.93697, 102.8586, -15.588125, 12.266582, ...
4 [-205.7702, 76.35817, -9.235604, 15.009566, -7...
...
112 [-187.44896, 61.173885, -23.143469, 12.28747, ...
113 [-231.38158, 89.49133, -53.52144, 14.127972, -...
114 [-149.52303, 19.070164, -47.10498, 24.893179, ...
115 [-178.13693, 117.8742, -29.652536, 19.494795, ...
116 [-192.67363, 94.144516, -42.384525, 28.747828,...
Name: path, Length: 117, dtype: object
# Create a list 'X' using list comprehension to iterate through each element in the 'X_mfcc' Series
#x: This represents each element in the iterable X_mfcc.
X = [x for x in X_mfcc]
# Convert the list 'X' into a NumPy array for efficient numerical operations
X = np.array(X)
# Retrieve and display the shape of the NumPy array 'X', indicating the number of samples and features
X.shape
(117, 40)
# Expand the dimensions of the NumPy array 'X' by adding a new axis at the end
X = np.expand_dims(X, -1)
# Display the shape of the modified array 'X'
X.shape
(117, 40, 1)
# Import the OneHotEncoder class from scikit-learn
from sklearn.preprocessing import OneHotEncoder
# Create an instance of the OneHotEncoder
enc = OneHotEncoder()
# Apply the OneHotEncoder to the 'Emotions' column in the DataFrame and transform it
# The result is a sparse matrix containing one-hot encoded representations of the 'Emotions'
y = enc.fit_transform(df[['Emotions']])
# Convert sparse matrix 'y' to a dense NumPy array
y = y.toarray()
# Print the shape of the resulting dense array
print(y.shape)
(117, 8)
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
# Assume X and y are your feature matrix and target variable
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# K-fold cross-validation
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
# Iterate over the folds
for train_index, test_index in kfold.split(X, df['Emotions']):
# Split the data into training and testing sets for the current fold
X_train_fold, X_test_fold = X[train_index], X[test_index]
y_train_fold, y_test_fold = y[train_index], y[test_index]
# print or perform any additional operations specific to each fold
# print the shape of the current fold's training and testing sets
print("Training set shape:", X_train_fold.shape, y_train_fold.shape)
print("Testing set shape:", X_test_fold.shape, y_test_fold.shape)
print("---------------------------------")
Training set shape: (93, 40, 1) (93, 8) Testing set shape: (24, 40, 1) (24, 8) --------------------------------- Training set shape: (93, 40, 1) (93, 8) Testing set shape: (24, 40, 1) (24, 8) --------------------------------- Training set shape: (94, 40, 1) (94, 8) Testing set shape: (23, 40, 1) (23, 8) --------------------------------- Training set shape: (94, 40, 1) (94, 8) Testing set shape: (23, 40, 1) (23, 8) --------------------------------- Training set shape: (94, 40, 1) (94, 8) Testing set shape: (23, 40, 1) (23, 8) ---------------------------------
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, LSTM, Dense, Dropout, Flatten
from keras.optimizers import Adam
# Assuming you have already defined X and y
# Define your learning rate
custom_learning_rate = 0.001
# Create the Adam optimizer with a custom learning rate
custom_optimizer = Adam(learning_rate=custom_learning_rate)
# Build the model
model = Sequential([
# Convolutional layer with 265 filters and a kernel size of 5, using ReLU activation
Conv1D(265, kernel_size=5, activation='relu', input_shape=(40, 1)), #CNN
# Max pooling layer with a pool size of 2
MaxPooling1D(pool_size=2),
# LSTM layer with 128 units and return_sequences set to False
LSTM(128, return_sequences=False), #RNN #False means that the LSTM layer will only return the output at the last timestep for each input sequence.
# Dropout layer with a rate of 0.2 to reduce overfitting
Dropout(0.2),
#ANN
# Fully connected dense layer with 500 units and ReLU activation
Dense(500, activation='relu'),
# Dropout layer with a rate of 0.2
Dropout(0.2),
# Fully connected dense layer with 64 units and ReLU activation
Dense(64, activation='relu'),
# Dropout layer with a rate of 0.2
Dropout(0.2),
# Fully connected dense layer with 16 units and ReLU activation
Dense(16, activation='relu'),
# Dropout layer with a rate of 0.2
Dropout(0.2),
# Output layer with 8 units and softmax activation for multi-class classification
Dense(8, activation='softmax')
])
model.compile(loss='categorical_crossentropy', optimizer=custom_optimizer, metrics=['accuracy'])
model.summary()
WARNING:tensorflow:From C:\Users\Nagham\anaconda3\Lib\site-packages\keras\src\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.
WARNING:tensorflow:From C:\Users\Nagham\anaconda3\Lib\site-packages\keras\src\backend.py:873: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.
WARNING:tensorflow:From C:\Users\Nagham\anaconda3\Lib\site-packages\keras\src\backend.py:6642: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv1d (Conv1D) (None, 36, 265) 1590
max_pooling1d (MaxPooling1 (None, 18, 265) 0
D)
lstm (LSTM) (None, 128) 201728
dropout (Dropout) (None, 128) 0
dense (Dense) (None, 500) 64500
dropout_1 (Dropout) (None, 500) 0
dense_1 (Dense) (None, 64) 32064
dropout_2 (Dropout) (None, 64) 0
dense_2 (Dense) (None, 16) 1040
dropout_3 (Dropout) (None, 16) 0
dense_3 (Dense) (None, 8) 136
=================================================================
Total params: 301058 (1.15 MB)
Trainable params: 301058 (1.15 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
# Train the model
# Assuming you have already defined X_train, y_train, X_val, y_val, and model
for train_index, val_index in kfold.split(X, df['Emotions']):
X_train, X_val = X[train_index], X[val_index]
y_train, y_val = y[train_index], y[val_index]
# Build and compile the model (if needed)
# Fit the model
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=30, batch_size=64, verbose=1)
Epoch 1/30 WARNING:tensorflow:From C:\Users\Nagham\anaconda3\Lib\site-packages\keras\src\utils\tf_utils.py:492: The name tf.ragged.RaggedTensorValue is deprecated. Please use tf.compat.v1.ragged.RaggedTensorValue instead. WARNING:tensorflow:From C:\Users\Nagham\anaconda3\Lib\site-packages\keras\src\engine\base_layer_utils.py:384: The name tf.executing_eagerly_outside_functions is deprecated. Please use tf.compat.v1.executing_eagerly_outside_functions instead. 2/2 [==============================] - 7s 1s/step - loss: 2.0846 - accuracy: 0.1075 - val_loss: 2.0541 - val_accuracy: 0.1667 Epoch 2/30 2/2 [==============================] - 0s 91ms/step - loss: 2.0635 - accuracy: 0.1290 - val_loss: 2.0444 - val_accuracy: 0.1667 Epoch 3/30 2/2 [==============================] - 0s 96ms/step - loss: 2.0857 - accuracy: 0.1290 - val_loss: 2.0494 - val_accuracy: 0.1667 Epoch 4/30 2/2 [==============================] - 0s 99ms/step - loss: 2.0552 - accuracy: 0.0968 - val_loss: 2.0451 - val_accuracy: 0.1667 Epoch 5/30 2/2 [==============================] - 0s 104ms/step - loss: 2.0687 - accuracy: 0.1505 - val_loss: 2.0421 - val_accuracy: 0.1667 Epoch 6/30 2/2 [==============================] - 0s 113ms/step - loss: 2.0433 - accuracy: 0.1613 - val_loss: 2.0427 - val_accuracy: 0.1667 Epoch 7/30 2/2 [==============================] - 0s 90ms/step - loss: 2.0503 - accuracy: 0.1398 - val_loss: 2.0484 - val_accuracy: 0.1250 Epoch 8/30 2/2 [==============================] - 0s 103ms/step - loss: 2.0928 - accuracy: 0.1720 - val_loss: 2.0504 - val_accuracy: 0.1667 Epoch 9/30 2/2 [==============================] - 0s 102ms/step - loss: 2.0373 - accuracy: 0.1613 - val_loss: 2.0463 - val_accuracy: 0.1667 Epoch 10/30 2/2 [==============================] - 0s 97ms/step - loss: 2.0634 - accuracy: 0.1505 - val_loss: 2.0401 - val_accuracy: 0.1667 Epoch 11/30 2/2 [==============================] - 0s 91ms/step - loss: 2.0555 - accuracy: 0.2043 - val_loss: 2.0391 - val_accuracy: 0.0833 Epoch 12/30 2/2 [==============================] - 0s 101ms/step - loss: 2.0290 - accuracy: 0.1828 - val_loss: 2.0428 - val_accuracy: 0.1250 Epoch 13/30 2/2 [==============================] - 0s 90ms/step - loss: 2.0187 - accuracy: 0.2473 - val_loss: 2.0435 - val_accuracy: 0.1250 Epoch 14/30 2/2 [==============================] - 0s 91ms/step - loss: 2.0116 - accuracy: 0.2043 - val_loss: 2.0370 - val_accuracy: 0.1250 Epoch 15/30 2/2 [==============================] - 0s 100ms/step - loss: 1.9997 - accuracy: 0.2366 - val_loss: 2.0313 - val_accuracy: 0.0833 Epoch 16/30 2/2 [==============================] - 0s 97ms/step - loss: 2.0279 - accuracy: 0.1720 - val_loss: 2.0256 - val_accuracy: 0.1250 Epoch 17/30 2/2 [==============================] - 0s 102ms/step - loss: 2.0352 - accuracy: 0.2258 - val_loss: 2.0251 - val_accuracy: 0.1250 Epoch 18/30 2/2 [==============================] - 0s 98ms/step - loss: 1.9977 - accuracy: 0.2043 - val_loss: 2.0313 - val_accuracy: 0.1667 Epoch 19/30 2/2 [==============================] - 0s 85ms/step - loss: 1.9777 - accuracy: 0.2581 - val_loss: 2.0401 - val_accuracy: 0.1667 Epoch 20/30 2/2 [==============================] - 0s 105ms/step - loss: 1.9975 - accuracy: 0.1828 - val_loss: 2.0346 - val_accuracy: 0.1667 Epoch 21/30 2/2 [==============================] - 0s 107ms/step - loss: 1.9720 - accuracy: 0.2366 - val_loss: 2.0105 - val_accuracy: 0.1667 Epoch 22/30 2/2 [==============================] - 0s 101ms/step - loss: 1.9833 - accuracy: 0.2366 - val_loss: 2.0117 - val_accuracy: 0.2083 Epoch 23/30 2/2 [==============================] - 0s 94ms/step - loss: 2.0025 - accuracy: 0.1720 - val_loss: 2.0253 - val_accuracy: 0.1250 Epoch 24/30 2/2 [==============================] - 0s 104ms/step - loss: 1.9596 - accuracy: 0.2473 - val_loss: 2.0213 - val_accuracy: 0.1667 Epoch 25/30 2/2 [==============================] - 0s 99ms/step - loss: 1.9402 - accuracy: 0.2366 - val_loss: 1.9972 - val_accuracy: 0.2083 Epoch 26/30 2/2 [==============================] - 0s 103ms/step - loss: 1.9337 - accuracy: 0.2258 - val_loss: 2.0127 - val_accuracy: 0.1667 Epoch 27/30 2/2 [==============================] - 0s 93ms/step - loss: 1.9036 - accuracy: 0.2581 - val_loss: 2.0469 - val_accuracy: 0.2083 Epoch 28/30 2/2 [==============================] - 0s 106ms/step - loss: 1.9233 - accuracy: 0.2258 - val_loss: 2.0010 - val_accuracy: 0.1667 Epoch 29/30 2/2 [==============================] - 0s 100ms/step - loss: 1.8692 - accuracy: 0.2796 - val_loss: 1.9806 - val_accuracy: 0.1250 Epoch 30/30 2/2 [==============================] - 0s 103ms/step - loss: 1.9033 - accuracy: 0.1828 - val_loss: 1.9828 - val_accuracy: 0.2083 Epoch 1/30 2/2 [==============================] - 0s 183ms/step - loss: 1.8959 - accuracy: 0.2258 - val_loss: 1.8892 - val_accuracy: 0.2500 Epoch 2/30 2/2 [==============================] - 0s 116ms/step - loss: 1.9044 - accuracy: 0.2473 - val_loss: 1.8679 - val_accuracy: 0.3333 Epoch 3/30 2/2 [==============================] - 0s 106ms/step - loss: 1.8717 - accuracy: 0.2903 - val_loss: 1.8680 - val_accuracy: 0.3750 Epoch 4/30 2/2 [==============================] - 0s 93ms/step - loss: 1.7870 - accuracy: 0.2796 - val_loss: 1.8660 - val_accuracy: 0.3750 Epoch 5/30 2/2 [==============================] - 0s 122ms/step - loss: 1.7921 - accuracy: 0.3011 - val_loss: 1.8838 - val_accuracy: 0.3333 Epoch 6/30 2/2 [==============================] - 0s 99ms/step - loss: 1.7896 - accuracy: 0.3118 - val_loss: 1.9037 - val_accuracy: 0.3333 Epoch 7/30 2/2 [==============================] - 0s 100ms/step - loss: 1.8226 - accuracy: 0.3011 - val_loss: 1.9769 - val_accuracy: 0.3333 Epoch 8/30 2/2 [==============================] - 0s 101ms/step - loss: 1.7574 - accuracy: 0.3226 - val_loss: 2.0090 - val_accuracy: 0.4167 Epoch 9/30 2/2 [==============================] - 0s 103ms/step - loss: 1.7626 - accuracy: 0.3226 - val_loss: 2.0365 - val_accuracy: 0.2917 Epoch 10/30 2/2 [==============================] - 0s 115ms/step - loss: 1.8839 - accuracy: 0.2151 - val_loss: 1.9222 - val_accuracy: 0.4167 Epoch 11/30 2/2 [==============================] - 0s 102ms/step - loss: 1.7191 - accuracy: 0.3226 - val_loss: 1.9861 - val_accuracy: 0.3333 Epoch 12/30 2/2 [==============================] - 0s 113ms/step - loss: 1.7686 - accuracy: 0.3011 - val_loss: 1.9418 - val_accuracy: 0.3333 Epoch 13/30 2/2 [==============================] - 0s 101ms/step - loss: 1.6426 - accuracy: 0.3656 - val_loss: 1.9036 - val_accuracy: 0.2917 Epoch 14/30 2/2 [==============================] - 0s 109ms/step - loss: 1.7660 - accuracy: 0.2473 - val_loss: 1.9379 - val_accuracy: 0.3750 Epoch 15/30 2/2 [==============================] - 0s 115ms/step - loss: 1.5925 - accuracy: 0.4301 - val_loss: 2.0154 - val_accuracy: 0.3750 Epoch 16/30 2/2 [==============================] - 0s 108ms/step - loss: 1.7268 - accuracy: 0.3548 - val_loss: 2.0279 - val_accuracy: 0.3750 Epoch 17/30 2/2 [==============================] - 0s 100ms/step - loss: 1.6732 - accuracy: 0.3656 - val_loss: 2.0637 - val_accuracy: 0.3333 Epoch 18/30 2/2 [==============================] - 0s 106ms/step - loss: 1.6049 - accuracy: 0.3871 - val_loss: 2.0919 - val_accuracy: 0.3333 Epoch 19/30 2/2 [==============================] - 0s 110ms/step - loss: 1.5921 - accuracy: 0.3656 - val_loss: 2.0304 - val_accuracy: 0.3333 Epoch 20/30 2/2 [==============================] - 0s 114ms/step - loss: 1.4868 - accuracy: 0.4194 - val_loss: 2.1893 - val_accuracy: 0.2917 Epoch 21/30 2/2 [==============================] - 0s 84ms/step - loss: 1.5031 - accuracy: 0.4086 - val_loss: 2.1130 - val_accuracy: 0.3750 Epoch 22/30 2/2 [==============================] - 0s 112ms/step - loss: 1.4784 - accuracy: 0.3871 - val_loss: 2.0412 - val_accuracy: 0.1667 Epoch 23/30 2/2 [==============================] - 0s 110ms/step - loss: 1.4879 - accuracy: 0.4194 - val_loss: 2.1486 - val_accuracy: 0.2500 Epoch 24/30 2/2 [==============================] - 0s 100ms/step - loss: 1.4624 - accuracy: 0.4086 - val_loss: 2.1283 - val_accuracy: 0.3750 Epoch 25/30 2/2 [==============================] - 0s 106ms/step - loss: 1.4547 - accuracy: 0.4624 - val_loss: 2.0240 - val_accuracy: 0.2917 Epoch 26/30 2/2 [==============================] - 0s 97ms/step - loss: 1.3366 - accuracy: 0.4409 - val_loss: 2.0938 - val_accuracy: 0.2917 Epoch 27/30 2/2 [==============================] - 0s 88ms/step - loss: 1.3613 - accuracy: 0.4624 - val_loss: 1.9381 - val_accuracy: 0.2917 Epoch 28/30 2/2 [==============================] - 0s 113ms/step - loss: 1.4278 - accuracy: 0.4301 - val_loss: 2.1927 - val_accuracy: 0.3333 Epoch 29/30 2/2 [==============================] - 0s 170ms/step - loss: 1.3061 - accuracy: 0.5161 - val_loss: 2.1611 - val_accuracy: 0.3333 Epoch 30/30 2/2 [==============================] - 0s 113ms/step - loss: 1.3033 - accuracy: 0.4839 - val_loss: 2.0680 - val_accuracy: 0.1667 Epoch 1/30 2/2 [==============================] - 0s 237ms/step - loss: 1.5211 - accuracy: 0.4468 - val_loss: 1.1646 - val_accuracy: 0.5652 Epoch 2/30 2/2 [==============================] - 0s 99ms/step - loss: 1.4600 - accuracy: 0.4255 - val_loss: 1.0303 - val_accuracy: 0.6957 Epoch 3/30 2/2 [==============================] - 0s 94ms/step - loss: 1.4971 - accuracy: 0.4362 - val_loss: 1.0748 - val_accuracy: 0.7391 Epoch 4/30 2/2 [==============================] - 0s 91ms/step - loss: 1.4684 - accuracy: 0.3936 - val_loss: 1.2188 - val_accuracy: 0.6087 Epoch 5/30 2/2 [==============================] - 0s 112ms/step - loss: 1.4260 - accuracy: 0.4681 - val_loss: 1.1968 - val_accuracy: 0.6522 Epoch 6/30 2/2 [==============================] - 0s 105ms/step - loss: 1.3886 - accuracy: 0.4787 - val_loss: 1.3807 - val_accuracy: 0.4348 Epoch 7/30 2/2 [==============================] - 0s 105ms/step - loss: 1.3656 - accuracy: 0.4468 - val_loss: 1.2751 - val_accuracy: 0.4348 Epoch 8/30 2/2 [==============================] - 0s 113ms/step - loss: 1.2660 - accuracy: 0.4894 - val_loss: 1.1770 - val_accuracy: 0.6087 Epoch 9/30 2/2 [==============================] - 0s 180ms/step - loss: 1.3564 - accuracy: 0.4574 - val_loss: 1.1519 - val_accuracy: 0.7391 Epoch 10/30 2/2 [==============================] - 0s 87ms/step - loss: 1.2073 - accuracy: 0.4574 - val_loss: 1.2228 - val_accuracy: 0.6522 Epoch 11/30 2/2 [==============================] - 0s 98ms/step - loss: 1.3198 - accuracy: 0.5000 - val_loss: 1.1375 - val_accuracy: 0.6087 Epoch 12/30 2/2 [==============================] - 0s 78ms/step - loss: 1.2308 - accuracy: 0.5106 - val_loss: 1.1119 - val_accuracy: 0.6957 Epoch 13/30 2/2 [==============================] - 0s 78ms/step - loss: 1.1203 - accuracy: 0.5957 - val_loss: 1.2781 - val_accuracy: 0.5217 Epoch 14/30 2/2 [==============================] - 0s 84ms/step - loss: 1.1855 - accuracy: 0.5851 - val_loss: 1.1338 - val_accuracy: 0.6087 Epoch 15/30 2/2 [==============================] - 0s 99ms/step - loss: 1.1292 - accuracy: 0.6383 - val_loss: 1.2988 - val_accuracy: 0.5652 Epoch 16/30 2/2 [==============================] - 0s 99ms/step - loss: 1.1391 - accuracy: 0.5851 - val_loss: 1.2716 - val_accuracy: 0.5652 Epoch 17/30 2/2 [==============================] - 0s 96ms/step - loss: 1.1659 - accuracy: 0.5532 - val_loss: 1.3304 - val_accuracy: 0.5652 Epoch 18/30 2/2 [==============================] - 0s 89ms/step - loss: 1.0954 - accuracy: 0.6277 - val_loss: 1.3722 - val_accuracy: 0.5217 Epoch 19/30 2/2 [==============================] - 0s 93ms/step - loss: 1.0048 - accuracy: 0.6809 - val_loss: 1.3853 - val_accuracy: 0.4348 Epoch 20/30 2/2 [==============================] - 0s 92ms/step - loss: 0.9899 - accuracy: 0.6170 - val_loss: 1.3477 - val_accuracy: 0.4783 Epoch 21/30 2/2 [==============================] - 0s 80ms/step - loss: 0.9021 - accuracy: 0.6489 - val_loss: 1.3180 - val_accuracy: 0.5652 Epoch 22/30 2/2 [==============================] - 0s 74ms/step - loss: 0.9151 - accuracy: 0.6489 - val_loss: 1.2485 - val_accuracy: 0.6087 Epoch 23/30 2/2 [==============================] - 0s 78ms/step - loss: 0.9635 - accuracy: 0.6489 - val_loss: 1.5807 - val_accuracy: 0.6087 Epoch 24/30 2/2 [==============================] - 0s 80ms/step - loss: 0.9191 - accuracy: 0.6596 - val_loss: 1.4197 - val_accuracy: 0.5217 Epoch 25/30 2/2 [==============================] - 0s 94ms/step - loss: 1.0796 - accuracy: 0.6170 - val_loss: 1.3572 - val_accuracy: 0.4783 Epoch 26/30 2/2 [==============================] - 0s 88ms/step - loss: 0.8874 - accuracy: 0.6383 - val_loss: 1.3993 - val_accuracy: 0.5652 Epoch 27/30 2/2 [==============================] - 0s 78ms/step - loss: 0.8345 - accuracy: 0.6702 - val_loss: 1.6149 - val_accuracy: 0.3913 Epoch 28/30 2/2 [==============================] - 0s 95ms/step - loss: 0.9446 - accuracy: 0.5957 - val_loss: 1.5465 - val_accuracy: 0.5217 Epoch 29/30 2/2 [==============================] - 0s 83ms/step - loss: 0.7344 - accuracy: 0.7553 - val_loss: 1.6509 - val_accuracy: 0.4783 Epoch 30/30 2/2 [==============================] - 0s 81ms/step - loss: 0.8865 - accuracy: 0.6809 - val_loss: 1.6924 - val_accuracy: 0.4783 Epoch 1/30 2/2 [==============================] - 0s 200ms/step - loss: 0.8916 - accuracy: 0.6596 - val_loss: 0.6197 - val_accuracy: 0.7826 Epoch 2/30 2/2 [==============================] - 0s 98ms/step - loss: 1.2018 - accuracy: 0.6489 - val_loss: 0.4735 - val_accuracy: 0.8696 Epoch 3/30 2/2 [==============================] - 0s 81ms/step - loss: 0.8690 - accuracy: 0.7234 - val_loss: 0.5649 - val_accuracy: 0.7826 Epoch 4/30 2/2 [==============================] - 0s 77ms/step - loss: 0.8225 - accuracy: 0.7128 - val_loss: 0.6662 - val_accuracy: 0.7391 Epoch 5/30 2/2 [==============================] - 0s 83ms/step - loss: 0.9389 - accuracy: 0.6915 - val_loss: 0.7839 - val_accuracy: 0.7391 Epoch 6/30 2/2 [==============================] - 0s 82ms/step - loss: 0.9358 - accuracy: 0.6702 - val_loss: 0.7369 - val_accuracy: 0.6522 Epoch 7/30 2/2 [==============================] - 0s 84ms/step - loss: 0.9115 - accuracy: 0.6596 - val_loss: 0.6475 - val_accuracy: 0.6957 Epoch 8/30 2/2 [==============================] - 0s 77ms/step - loss: 0.8486 - accuracy: 0.7021 - val_loss: 0.5897 - val_accuracy: 0.9130 Epoch 9/30 2/2 [==============================] - 0s 82ms/step - loss: 0.9881 - accuracy: 0.7021 - val_loss: 0.7677 - val_accuracy: 0.6957 Epoch 10/30 2/2 [==============================] - 0s 83ms/step - loss: 0.7951 - accuracy: 0.7340 - val_loss: 0.6826 - val_accuracy: 0.6957 Epoch 11/30 2/2 [==============================] - 0s 93ms/step - loss: 0.7163 - accuracy: 0.7553 - val_loss: 0.9755 - val_accuracy: 0.5652 Epoch 12/30 2/2 [==============================] - 0s 92ms/step - loss: 0.9094 - accuracy: 0.6809 - val_loss: 1.0620 - val_accuracy: 0.5217 Epoch 13/30 2/2 [==============================] - 0s 86ms/step - loss: 0.7003 - accuracy: 0.7553 - val_loss: 1.1490 - val_accuracy: 0.5652 Epoch 14/30 2/2 [==============================] - 0s 80ms/step - loss: 0.7850 - accuracy: 0.7340 - val_loss: 1.0436 - val_accuracy: 0.6087 Epoch 15/30 2/2 [==============================] - 0s 77ms/step - loss: 0.8099 - accuracy: 0.7128 - val_loss: 0.9802 - val_accuracy: 0.6087 Epoch 16/30 2/2 [==============================] - 0s 105ms/step - loss: 0.7698 - accuracy: 0.7660 - val_loss: 0.9343 - val_accuracy: 0.6957 Epoch 17/30 2/2 [==============================] - 0s 86ms/step - loss: 0.6334 - accuracy: 0.8191 - val_loss: 0.9635 - val_accuracy: 0.6522 Epoch 18/30 2/2 [==============================] - 0s 75ms/step - loss: 0.5857 - accuracy: 0.8085 - val_loss: 1.0266 - val_accuracy: 0.6087 Epoch 19/30 2/2 [==============================] - 0s 81ms/step - loss: 0.5806 - accuracy: 0.8298 - val_loss: 1.0433 - val_accuracy: 0.5652 Epoch 20/30 2/2 [==============================] - 0s 86ms/step - loss: 0.5503 - accuracy: 0.8511 - val_loss: 0.9916 - val_accuracy: 0.6522 Epoch 21/30 2/2 [==============================] - 0s 90ms/step - loss: 0.5127 - accuracy: 0.8404 - val_loss: 1.1432 - val_accuracy: 0.6522 Epoch 22/30 2/2 [==============================] - 0s 83ms/step - loss: 0.5503 - accuracy: 0.8191 - val_loss: 0.9680 - val_accuracy: 0.6087 Epoch 23/30 2/2 [==============================] - 0s 103ms/step - loss: 0.5309 - accuracy: 0.8298 - val_loss: 1.0979 - val_accuracy: 0.6087 Epoch 24/30 2/2 [==============================] - 0s 109ms/step - loss: 0.6752 - accuracy: 0.7660 - val_loss: 1.0591 - val_accuracy: 0.6522 Epoch 25/30 2/2 [==============================] - 0s 110ms/step - loss: 0.4820 - accuracy: 0.8191 - val_loss: 1.1448 - val_accuracy: 0.6522 Epoch 26/30 2/2 [==============================] - 0s 96ms/step - loss: 0.5894 - accuracy: 0.7872 - val_loss: 1.1030 - val_accuracy: 0.6522 Epoch 27/30 2/2 [==============================] - 0s 82ms/step - loss: 0.4961 - accuracy: 0.8191 - val_loss: 1.1809 - val_accuracy: 0.6087 Epoch 28/30 2/2 [==============================] - 0s 87ms/step - loss: 0.4019 - accuracy: 0.8511 - val_loss: 1.3452 - val_accuracy: 0.6087 Epoch 29/30 2/2 [==============================] - 0s 85ms/step - loss: 0.3497 - accuracy: 0.8723 - val_loss: 1.3843 - val_accuracy: 0.6087 Epoch 30/30 2/2 [==============================] - 0s 82ms/step - loss: 0.5623 - accuracy: 0.7872 - val_loss: 1.2802 - val_accuracy: 0.5652 Epoch 1/30 2/2 [==============================] - 0s 167ms/step - loss: 0.7559 - accuracy: 0.7234 - val_loss: 0.0964 - val_accuracy: 1.0000 Epoch 2/30 2/2 [==============================] - 0s 106ms/step - loss: 0.6803 - accuracy: 0.7553 - val_loss: 0.1103 - val_accuracy: 1.0000 Epoch 3/30 2/2 [==============================] - 0s 97ms/step - loss: 0.6019 - accuracy: 0.7234 - val_loss: 0.2206 - val_accuracy: 0.9130 Epoch 4/30 2/2 [==============================] - 0s 158ms/step - loss: 0.6992 - accuracy: 0.7553 - val_loss: 0.1785 - val_accuracy: 1.0000 Epoch 5/30 2/2 [==============================] - 0s 96ms/step - loss: 0.6607 - accuracy: 0.7340 - val_loss: 0.1425 - val_accuracy: 1.0000 Epoch 6/30 2/2 [==============================] - 0s 94ms/step - loss: 0.5973 - accuracy: 0.7660 - val_loss: 0.1250 - val_accuracy: 1.0000 Epoch 7/30 2/2 [==============================] - 0s 81ms/step - loss: 0.5236 - accuracy: 0.8404 - val_loss: 0.1226 - val_accuracy: 1.0000 Epoch 8/30 2/2 [==============================] - 0s 106ms/step - loss: 0.6054 - accuracy: 0.7872 - val_loss: 0.1746 - val_accuracy: 0.9565 Epoch 9/30 2/2 [==============================] - 0s 106ms/step - loss: 0.5507 - accuracy: 0.8404 - val_loss: 0.1575 - val_accuracy: 1.0000 Epoch 10/30 2/2 [==============================] - 0s 136ms/step - loss: 0.3238 - accuracy: 0.9149 - val_loss: 0.1796 - val_accuracy: 0.9565 Epoch 11/30 2/2 [==============================] - 0s 116ms/step - loss: 0.3936 - accuracy: 0.9043 - val_loss: 0.2570 - val_accuracy: 0.9130 Epoch 12/30 2/2 [==============================] - 0s 108ms/step - loss: 0.5442 - accuracy: 0.7872 - val_loss: 0.2527 - val_accuracy: 0.9130 Epoch 13/30 2/2 [==============================] - 0s 105ms/step - loss: 0.2900 - accuracy: 0.9043 - val_loss: 0.2413 - val_accuracy: 0.8696 Epoch 14/30 2/2 [==============================] - 0s 110ms/step - loss: 0.3175 - accuracy: 0.9043 - val_loss: 0.2473 - val_accuracy: 0.9130 Epoch 15/30 2/2 [==============================] - 0s 105ms/step - loss: 0.4276 - accuracy: 0.8298 - val_loss: 0.2484 - val_accuracy: 0.9130 Epoch 16/30 2/2 [==============================] - 0s 94ms/step - loss: 0.3289 - accuracy: 0.8723 - val_loss: 0.2815 - val_accuracy: 0.9130 Epoch 17/30 2/2 [==============================] - 0s 125ms/step - loss: 0.3498 - accuracy: 0.8830 - val_loss: 0.2930 - val_accuracy: 0.9130 Epoch 18/30 2/2 [==============================] - 0s 115ms/step - loss: 0.3191 - accuracy: 0.9043 - val_loss: 0.2558 - val_accuracy: 0.9130 Epoch 19/30 2/2 [==============================] - 0s 121ms/step - loss: 0.3363 - accuracy: 0.8936 - val_loss: 0.2086 - val_accuracy: 0.9565 Epoch 20/30 2/2 [==============================] - 0s 133ms/step - loss: 0.3060 - accuracy: 0.9043 - val_loss: 0.1857 - val_accuracy: 0.9565 Epoch 21/30 2/2 [==============================] - 0s 106ms/step - loss: 0.3141 - accuracy: 0.8723 - val_loss: 0.1951 - val_accuracy: 0.9130 Epoch 22/30 2/2 [==============================] - 0s 101ms/step - loss: 0.2270 - accuracy: 0.9043 - val_loss: 0.2186 - val_accuracy: 0.9130 Epoch 23/30 2/2 [==============================] - 0s 112ms/step - loss: 0.3515 - accuracy: 0.8511 - val_loss: 0.2037 - val_accuracy: 0.9130 Epoch 24/30 2/2 [==============================] - 0s 98ms/step - loss: 0.1934 - accuracy: 0.9255 - val_loss: 0.1916 - val_accuracy: 0.9565 Epoch 25/30 2/2 [==============================] - 0s 122ms/step - loss: 0.2769 - accuracy: 0.8830 - val_loss: 0.2051 - val_accuracy: 0.9130 Epoch 26/30 2/2 [==============================] - 0s 137ms/step - loss: 0.2730 - accuracy: 0.8830 - val_loss: 0.1932 - val_accuracy: 0.9565 Epoch 27/30 2/2 [==============================] - 0s 146ms/step - loss: 0.2991 - accuracy: 0.9149 - val_loss: 0.2111 - val_accuracy: 0.9565 Epoch 28/30 2/2 [==============================] - 0s 129ms/step - loss: 0.2672 - accuracy: 0.8936 - val_loss: 0.2941 - val_accuracy: 0.9130 Epoch 29/30 2/2 [==============================] - 0s 117ms/step - loss: 0.2740 - accuracy: 0.8936 - val_loss: 0.3528 - val_accuracy: 0.9130 Epoch 30/30 2/2 [==============================] - 0s 106ms/step - loss: 0.3092 - accuracy: 0.8723 - val_loss: 0.3562 - val_accuracy: 0.9130
#model.compile(loss='sparse_categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
import tensorflow #used for building and training NN models.
from tensorflow import keras #provides a high-level interface for building NN
import matplotlib.pyplot as plt #create visualizations
import matplotlib.cm as cm #used in visualizations to map numerical values to colors in a plot
#the changes in training and validation Loss
plt.title('Training & Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.plot(history.history['loss'],label='Training Loss')
plt.plot(history.history['val_loss'],label='Validation Loss')
plt.legend()
plt.show()
from tensorflow import keras #provides a high-level interface for building NN
import matplotlib.pyplot as plt #create visualizations
import matplotlib.cm as cm #used in visualizations to map numerical values to colors in a plot
#the changes in training and validation accuracy
plt.title('Training & Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.plot(history.history['accuracy'],label='Training Accuracy')
plt.plot(history.history['val_accuracy'],label='Validation Accuracy') #same the train but the model will expected from what he learned
plt.legend()
plt.show()
import librosa
# Load audio file
y, sr = librosa.load("C:/Users/Nagham/Downloads/deep learning/project/IMG_9493_[cut_7sec] [vocals].wav", sr=None)
# Extract MFCC features
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
# Display the shape of the MFCCs
print(mfccs.shape)
(40, 604)
# Reshape the MFCC features into a format suitable for model input
# Convert the 2D MFCC array into a 1D array with one row and flattened columns
input_data = mfccs.reshape(1, -1)
input_data
array([[-531.93524 , -530.9951 , -531.39087 , ..., -6.6996393,
-4.303115 , -4.2558637]], dtype=float32)
input_data.shape #print the shape of the input_data array
(1, 24160)
# Assuming mfccs is your NumPy array with shape (40, 302)
# If it's not a NumPy array, you can convert it using np.array(mfccs)
# Calculate the mean along the second axis (axis=1)
mfccs_minimized = np.mean(mfccs, axis=1, keepdims=True)
# The shape of mfccs_minimized will be (40, 1)
print(mfccs_minimized.shape)
(40, 1)
# Save the trained emotion detection model to a file in HDF5 format
model.save('path/to/emotion_detection.h5')
#.h5 file extension stands for Hierarchical Data Format version 5, which is a file format commonly used in the scientific and machine learning communities for storing and managing large amounts of data
import tensorflow as tf
from pydub import AudioSegment
# Load the emotion detection model from the saved HDF5 file
model = tf.keras.models.load_model('path/to/emotion_detection.h5')
# Define the list of emotions based on the mapping used in your model
emotion_list = ["Angry", "Shock", "Excited", "Happy", "Love", "Nervous", "Sad", "Scared"]
def predict_emotion(audio_file_path):
# Load audio file using pydub
audio = AudioSegment.from_wav(audio_file_path)
# Convert audio to a numpy array with floating-point representation
audio_array = np.array(audio.get_array_of_samples(), dtype=np.float32)
# Normalize the audio data to the range [-1, 1]
audio_array /= np.max(np.abs(audio_array))
# Extract MFCCs using librosa (adjust parameters as needed)
mfccs = librosa.feature.mfcc(y=audio_array, sr=sample_rate, n_mfcc=40)
# Aggregate MFCCs by taking the mean along the time axis
mfccs = np.mean(mfccs, axis=1, keepdims=True)
# Add a new axis to represent the channel (assuming last axis is the time axis)
mfccs = np.expand_dims(mfccs, axis=-1)
# Add a batch dimension if necessary (adjust input_shape accordingly)
mfccs = np.expand_dims(mfccs, axis=0)
# Make predictions using the loaded model
predictions = model.predict(mfccs)
# Interpret predictions based on the emotion list
predicted_class_index = np.argmax(predictions)
predicted_emotion = emotion_list[predicted_class_index]
return predicted_emotion
# Example usage:
audio_file_path = "C:/Users/Nagham/Downloads/deep learning/project/IMG_9493_[cut_7sec] [vocals].wav"
predicted_emotion = predict_emotion(audio_file_path)
print(f"Predicted Emotion: {predicted_emotion}")
ipd.Audio(audio_file_path)
1/1 [==============================] - 0s 497ms/step Predicted Emotion: Scared
# Example usage:
audio_file_path = 'C:/Users/Nagham/Downloads/deep learning/project/voice/excited/AnyConv.com__IMG_9278.wav'
predicted_emotion = predict_emotion(audio_file_path)
print(f"Predicted Emotion: {predicted_emotion}")
ipd.Audio(audio_file_path)
1/1 [==============================] - 0s 482ms/step Predicted Emotion: Angry